What is the relationship between the reasons school leaders cite for innovating and the future practices they hope to implement?

Correlation plot

For this guiding question, I look only at 2024 data. Below, you’ll see on the each pilot practice (what schools hope to implement in the next 1-5 years) on the left and catalyst for innovation, or the reason(s) school leaders cite for innovating, across the top.

# Correlations between catalyst variables and reasons for innovating
catalysts <- full %>% 
  select(starts_with("catalyst_")) %>% 
  select(-starts_with("catalyst_key"), -contains("_other")) 

pilots <- full %>% 
  select(starts_with("pilot_")) %>% 
  select(-contains("_other")) 
remove_zero_variance <- function(df) {
  # Apply function to each column
  non_constant_cols <- df[, apply(df, 2, sd) != 0]
  return(non_constant_cols)
}

catalysts_clean <- remove_zero_variance(catalysts)
pilots_clean <- remove_zero_variance(pilots) #just one removed

reasons_and_futures <- cor(pilots_clean, catalysts_clean)
rfdf <- data.frame(reasons_and_futures)
corrplot::corrplot(reasons_and_futures, method = "color", tl.col = "black", tl.cex = 0.3, c1.cex = 0.3, number.cex = 0.2, diag = TRUE)

Kind of clunky. I’m going to modify to fix the labels manually and then expand the figure to better see the correlations.

# rename catalysts
rename_catalyst_mapping <- setNames(
  dictionary$clean_labels[dictionary$variable_name %in% colnames(catalysts)], 
  dictionary$variable_name[dictionary$variable_name %in% colnames(catalysts)]
)

# rename pilots
rename_pilot_mapping <- setNames(
  dictionary$clean_labels[dictionary$variable_name %in% colnames(pilots)], 
  dictionary$variable_name[dictionary$variable_name %in% colnames(pilots)]
)

# Correlations between catalyst variables and reasons for innovating
catalysts <- full %>% 
  select(starts_with("catalyst_")) %>% 
  select(-starts_with("catalyst_key"), -contains("_other")) %>% 
  rename_with(~ rename_catalyst_mapping[.x], .cols = everything())

pilots <- full %>% 
  select(starts_with("pilot_")) %>% 
  select(-contains("_other")) %>% 
  rename_with(~ rename_pilot_mapping[.x], .cols = everything())

catalysts_clean <- remove_zero_variance(catalysts)
pilots_clean <- remove_zero_variance(pilots) #just one removed

reasons_and_futures <- cor(pilots_clean, catalysts_clean)
rfdf <- data.frame(reasons_and_futures)
corrplot::corrplot(reasons_and_futures, method = "color", tl.col = "black", tl.cex = 0.6, c1.cex = 0.3, number.cex = 0.2, diag = TRUE, cl.pos = "n")

Correlation table

rfdf %>% 
  round(2) %>% 
  datatable()

The following are related to each catalyst. Correlations above 0.15 are noted.

  • External: Assessments career (0.20), MTSS academics (0.15), assessments bilingual (0.15), multi-age (0.15)
  • Student agency: student conferences (0.18) assessments agency (0.17)
  • Teacher agency: hiring equity (0.22), enriched virtual (0.18), colead teachers (0.17)
  • Demographics: sel integrated (0.24), student conferences (0.18), advancement mastery (0.17), colead industry (0.17), antiracist (0.16), makerspace (0.15)
  • Internal: colead teachers(0.22), competency framework (0.18), extended learning (0.15), sel integrated (0.15)
  • Stakeholders: colead CBO (0.18), antiracist (0.17)
  • Inequities: UDL (0.15)
  • Absence: expanded open hours (0.18)
  • Covid: MTSS academics (0.16), interdisciplinary (0.15), multi-age (0.15)
  • No notable correlations for following catalysts: cutting edge, mental health

How have the main reasons for innovating changed since 2021?

catalyst_all_years <- import(here("data/longitudinal", "longitudinal_data.csv")) %>% 
  select(year, school_id, starts_with("catalyst")) %>% 
  filter(year == 2021 | year == 2024) 

catalyst_all_years_long <- catalyst_all_years %>% 
  select(-contains("_other"), -contains("_key")) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst",
               values_to = "selected",
               names_prefix = "catalyst_")

From Cycle 2, I had started to create this graph. This shows catalyst selection across schools.

library(ggrepel)

catalyst_all_years_long <- catalyst_all_years_long %>% 
  group_by(catalyst, year) %>% 
  summarize(total_selected = sum(selected))

label_positions <- catalyst_all_years_long %>% 
  group_by(catalyst) %>% 
  summarize(year = 2021, selected = first(total_selected))

catalyst_all_years_long %>% 
  ggplot(aes(x = year, y = total_selected, color = catalyst)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_label_repel(data = label_positions, aes(y = selected, label = catalyst)) +
  scale_x_continuous(breaks = c(2021, 2024)) +
  labs(x = "Year",
       y = "Number of Times Selected",
       title = "Catalyst Selection by Year \n Across Schools") +
  theme(legend.position = "none")

Version using percentages is added here.

n_2021 = 232
n_2024 = 189

catalyst_all_years_long <- catalyst_all_years_long %>% 
  mutate(pct = case_when(year == 2021 ~ total_selected/n_2021,
                          year == 2024 ~ total_selected/n_2024))

label_positions <- catalyst_all_years_long %>% 
  group_by(catalyst) %>% 
  summarize(year = 2021, pct = first(pct))

catalyst_all_years_long  %>% 
  ggplot(aes(x = year, y = pct, color = catalyst)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_label_repel(data = label_positions, aes(y = pct, label = catalyst)) +
  scale_x_continuous(breaks = c(2021, 2024)) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(x = "Year",
       y = "Percent of Times Selected",
       title = "Catalyst Selection by Year \n Across Schools") +
  theme(legend.position = "none")

What about schools who responded to the survey both years? So, looking within schools? Let’s narrow the sample and check that out.

catalyst_all_years_within <- catalyst_all_years %>% 
  filter(duplicated(school_id) | duplicated(school_id, fromLast = TRUE)) %>% 
  select(-contains("_other"), -contains("_key")) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst",
               values_to = "selected",
               names_prefix = "catalyst_") %>% 
  group_by(catalyst, year) %>% 
  summarize(total_selected = sum(selected))

label_positions <- catalyst_all_years_within %>% 
  group_by(catalyst) %>% 
  summarize(year = 2021, selected = first(total_selected))

catalyst_all_years_within %>% 
  ggplot(aes(x = year, y = total_selected, color = catalyst)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_label_repel(data = label_positions, aes(y = selected, label = catalyst)) +
  scale_x_continuous(breaks = c(2021, 2024)) +
  labs(x = "Year",
       y = "Number of Times Selected",
       title = "Catalyst Selection by Year \n Within Schools") +
  theme(legend.position = "none")

Now, in percentage. The total value here will be different than in the across graph since not ever school answered every year. Only 82 did.

n_within = 82

catalyst_all_years_within <- catalyst_all_years_within %>% 
  mutate(pct = total_selected/n_within)

label_positions <- catalyst_all_years_within %>% 
  group_by(catalyst) %>% 
  summarize(year = 2021, pct = first(pct))

catalyst_all_years_within  %>% 
  ggplot(aes(x = year, y = pct, color = catalyst)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_label_repel(data = label_positions, aes(y = pct, label = catalyst)) +
  scale_x_continuous(breaks = c(2021, 2024)) +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(x = "Year",
       y = "Percent of Times Selected",
       title = "Catalyst Selection by Year \n Within Schools") +
  theme(legend.position = "none")

Create a version similar to the most added practices figures.

catalyst_all_years_within_change <- catalyst_all_years_within %>% 
  pivot_wider(names_from = year,
              values_from = c(total_selected, pct)) 

catalyst_all_years_within_change %>% 
  ggplot(aes(x = total_selected_2021, xend = total_selected_2024, y = reorder(catalyst, total_selected_2024), yend = catalyst)) +
  geom_segment(color = "black", linetype = "dotted") +
  geom_point(aes(x = total_selected_2021), color = "red") +
  geom_point(aes(x = total_selected_2024), color = "blue") +
  guides(col = guide_legend(nrow = 1, title = NULL)) + 
  bar_x_scale_count +
  geom_text(
    aes(x = (total_selected_2021 + total_selected_2024)/2, label = paste("Δ =", total_selected_2024 - total_selected_2021), color = factor(sign(total_selected_2024 - total_selected_2021))),
    nudge_y = .3,
    hjust = 0,
    show.legend = FALSE
  ) +
  scale_color_manual(
    values = c("red", "blue"),
    labels = c("2021", "2024")
  ) +
  labs(
    y = "Catalysts",
    x = "Times Selected",
    title = "Catalyst Selection from 2021 to 2024 Within Schools"
  ) +
  theme(
    panel.grid.major.y = element_blank(),
    legend.position = "bottom",
    axis.text.y = element_text(size = rel(0.6))
  )

How many schools selected just one catalyst in particular?

one_cat <- catalyst_all_years %>% 
  select(-contains("_other"), -contains("_key")) %>% 
  mutate(cat_select = rowSums(across(3:11))) %>% 
  filter(cat_select == 1) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst",
               values_to = "selected",
               names_prefix = "catalyst_") %>% 
  filter(selected == 1) %>% 
  group_by(year, catalyst) %>% 
  summarise(count = n())
one_cat %>% 
  ggplot(aes(x = count, y = catalyst, fill = as.factor(year))) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = transcend_cols) +
  labs(title = "Solo-Select Catalysts",
       x = "Catalyst",
       y = "Count",
       legend = "Year")

Also going to represent this information here in a change plot. Note, this is not going to be within schools since there is only once school that selected one catalyst each year. Also note, covid and student_agency were exclusive to 2021.

one_cat %>% 
  pivot_wider(names_from = "year",
              values_from = "count") %>% 
  ggplot(aes(x = `2021`, xend = `2024`, y = reorder(catalyst, `2024`), yend = catalyst)) +
  geom_segment(color = "black", linetype = "dotted") +
  geom_point(aes(x = `2021`), color = "red") +
  geom_point(aes(x = `2024`), color = "blue") +
  geom_point(x = 1, y = "internal", color = "purple") +
  geom_point(x = 1, y = "external", color = "purple") +
  guides(col = guide_legend(nrow = 1, title = NULL)) + 
  # bar_x_scale_count +
  geom_text(
    aes(x = (`2021` + `2024`)/2 -1, label = paste("Δ =", `2024` - `2021`), color = factor(sign(`2024` - `2021`))),
    nudge_y = .3,
    hjust = 0,
    show.legend = FALSE
  )  +
  labs(
    y = "Catalysts",
    x = "Times Selected",
    title = "Solo-Select Catalyst Selection \nfrom 2021 to 2024 Across Schools"
  ) +
  theme(
    panel.grid.major.y = element_blank(),
    legend.position = "bottom",
    axis.text.y = element_text(size = rel(0.6))
  )

How often have schools that responded in both 2021 and 2024 changed their key catalysts?

cat_by_year <- catalyst_all_years %>% 
  select(year, school_id, starts_with("catalyst_key"), -contains("_other")) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst_key",
               values_to = "selected",
               names_prefix = "catalyst_key_") %>% 
  pivot_wider(names_from = year,
              values_from = selected) %>% 
  na.omit() %>%  #omit schools that only answered one year
  mutate(selected = case_when(`2021` == 0 & `2024` == 0 ~ "neither year",
                            `2021` == 0 & `2024` == 1 ~ "added",
                            `2021` == 1 & `2024` == 0 ~ "dropped",
                            `2021` == 1 & `2024` == 1 ~ "both years")) %>% 
  group_by(catalyst_key, selected) %>% 
  summarise(n = n())
cat_by_year %>% 
  ggplot(aes(x = n, y = catalyst_key, fill = selected)) +
  geom_col(position = "dodge") +
  scale_fill_manual(values = transcend_cols) +
  labs(title = "Catalyst Key Selection for Schools with Both Years of Data",
       x = "Catalyst",
       y = "Count",
       legend = "Status")

This is from Cycle 2. What if we looked at schools who only selected one catalyst?

one_cat_change <- catalyst_all_years %>% 
  select(-contains("_other"), -contains("_key")) %>% 
  mutate(cat_select = rowSums(across(3:11))) %>% 
  filter(cat_select == 1) %>% 
  pivot_longer(cols = contains("catalyst"),
               names_to = "catalyst",
               values_to = "selected",
               names_prefix = "catalyst_") %>% 
  filter(selected == 1)

Looks like there was just one school who meets this criteria. This school went from focusing on inequity to demographics.

For leaders who were concerned about future sustainability, what reasons did they cite for their concern and did these reasons differ across schools?

2024 only

barriers <- full %>% 
  select(school_id, starts_with("barrier"))

First, am curious how the free response category responded. I generated a wordcloud of these for barriers below.

library(wordcloud)
library(tm)

responses <- barriers$barrier_other_text

# Create a text corpus
corpus <- Corpus(VectorSource(responses))

# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower))  # Convert to lower case
corpus <- tm_map(corpus, removePunctuation)             # Remove punctuation
corpus <- tm_map(corpus, removeNumbers)                 # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("english"))  # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace)               # Strip whitespace

# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)

# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)

# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

While we’re here, I’m also including a wordcloud for the catalysts for innovation here.

responses <- full$catalyst_other_text

# Create a text corpus
corpus <- Corpus(VectorSource(responses))

# Text preprocessing
corpus <- tm_map(corpus, content_transformer(tolower))  # Convert to lower case
corpus <- tm_map(corpus, removePunctuation)             # Remove punctuation
corpus <- tm_map(corpus, removeNumbers)                 # Remove numbers
corpus <- tm_map(corpus, removeWords, stopwords("english"))  # Remove stopwords
corpus <- tm_map(corpus, stripWhitespace)               # Strip whitespace

# Create a document-term matrix
dtm <- TermDocumentMatrix(corpus)

# Convert the matrix to a dataframe
matrix <- as.matrix(dtm)
word_freqs <- sort(rowSums(matrix), decreasing=TRUE)
data <- data.frame(word=names(word_freqs), freq=word_freqs)

# Generate the wordcloud
set.seed(1234) # For reproducibility
wordcloud(words = data$word, freq = data$freq, min.freq = 1,
          max.words=200, random.order=FALSE, rot.per=0.35, 
          colors=brewer.pal(8, "Dark2"))

Back to barriers –

For the rest of the options, here is what leaders selected.

barriers <- barriers %>% 
  select(-contains("_other")) %>% 
  pivot_longer(cols = contains("barrier"),
               names_to = "barrier",
               values_to = "selected",
               names_prefix = "barrier_") %>% 
  filter(selected == 1) %>% 
  group_by(barrier) %>% 
  summarize(n = n())
barriers %>% 
  ggplot(aes(reorder(barrier, n), n)) +
  geom_col(aes(fill = barrier)) +
  scale_fill_manual(values = c(transcend_cols, transcend_cols2)) +
  labs(title = "2024 Barriers to Sustainability",
       x = "Barrier",
       y = "Count") +
  theme(legend.position = "none") +
  coord_flip()